{
    "cells": [
        {
            "cell_type": "code",
            "execution_count": null,
            "id": "9080b39e",
            "metadata": {},
            "outputs": [],
            "source": [
                "import logging\n",
                "import sys\n",
                "\n",
                "logging.basicConfig(stream=sys.stdout, level=logging.INFO)\n",
                "logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "id": "b4b4387b-413e-4016-ba1e-88b3d9410a38",
            "metadata": {},
            "outputs": [],
            "source": [
                "# fetch \"New York City\" page from Wikipedia\n",
                "from pathlib import Path\n",
                "\n",
                "import requests\n",
                "response = requests.get(\n",
                "    'https://en.wikipedia.org/w/api.php',\n",
                "    params={\n",
                "        'action': 'query',\n",
                "        'format': 'json',\n",
                "        'titles': 'New York City',\n",
                "        'prop': 'extracts',\n",
                "        # 'exintro': True,\n",
                "        'explaintext': True,\n",
                "    }\n",
                ").json()\n",
                "page = next(iter(response['query']['pages'].values()))\n",
                "nyc_text = page['extract']\n",
                "\n",
                "data_path = Path('data')\n",
                "if not data_path.exists():\n",
                "    Path.mkdir(data_path)\n",
                "\n",
                "with open('data/nyc_text.txt', 'w') as fp:\n",
                "    fp.write(nyc_text)"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "id": "f1a9eb90-335c-4214-8bb6-fd1edbe3ccbd",
            "metadata": {},
            "outputs": [],
            "source": [
                "# My OpenAI Key\n",
                "import os\n",
                "os.environ['OPENAI_API_KEY'] = \"INSERT OPENAI KEY\""
            ]
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "id": "8d0b2364-4806-4656-81e7-3f6e4b910b5b",
            "metadata": {},
            "outputs": [],
            "source": [
                "from llama_index import GPTTreeIndex, SimpleDirectoryReader"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "id": "1298bbb4-c99e-431e-93ef-eb32c0a2fc2a",
            "metadata": {
                "tags": []
            },
            "outputs": [],
            "source": [
                "documents = SimpleDirectoryReader('data').load_data()\n",
                "index = GPTTreeIndex.from_documents(documents)"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "id": "0b4fe9b6-5762-4e86-b51e-aac45d3ecdb1",
            "metadata": {},
            "outputs": [],
            "source": [
                "index.save_to_disk('index.json')"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "id": "5eec265d-211b-4f26-b05b-5b4e7072bc6e",
            "metadata": {},
            "outputs": [],
            "source": [
                "# try loading\n",
                "new_index = GPTTreeIndex.load_from_disk('index.json')"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "id": "68c9ebfe-b1b6-4f4e-9278-174346de8c90",
            "metadata": {
                "tags": []
            },
            "outputs": [],
            "source": [
                "# GPT doesn't find the corresponding evidence in the leaf node, but still gives the correct answer\n",
                "# set Logging to DEBUG for more detailed outputs\n",
                "\n",
                "new_index.query(\"What is the name of the professional women's basketball team in New York City?\")"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "id": "4fc3f18a-0ef9-453c-acf8-7aedd784cdcf",
            "metadata": {},
            "outputs": [],
            "source": [
                "# GPT doesn't find the corresponding evidence in the leaf node, but still gives the correct answer\n",
                "# set Logging to DEBUG for more detailed outputs\n",
                "\n",
                "new_index.query(\"What battles took place in New York City in the American Revolution?\")"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "id": "97f3ddf1-8dc2-4fb8-831f-2c06649e0955",
            "metadata": {},
            "outputs": [],
            "source": [
                "# GPT doesn't find the corresponding evidence in the leaf node, but still gives the correct answer\n",
                "# set Logging to DEBUG for more detailed outputs\n",
                "\n",
                "new_index.query(\"What are the airports in New York City?\")"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "id": "53265fd4-da98-4cf9-abfb-3f76105fd2ff",
            "metadata": {},
            "outputs": [],
            "source": [
                "# Try using embedding query\n",
                "new_index.query(\"What are the airports in New York City?\", mode=\"embedding\")"
            ]
        }
    ],
    "metadata": {
        "kernelspec": {
            "display_name": "Python 3 (ipykernel)",
            "language": "python",
            "name": "python3"
        },
        "language_info": {
            "codemirror_mode": {
                "name": "ipython",
                "version": 3
            },
            "file_extension": ".py",
            "mimetype": "text/x-python",
            "name": "python",
            "nbconvert_exporter": "python",
            "pygments_lexer": "ipython3",
            "version": "3.10.10"
        }
    },
    "nbformat": 4,
    "nbformat_minor": 5
}